source(here::here("code/load.R"))

#ecoef gives unstandardized coefficients but is by far the most complete variable
#ecoe is a consistent IV (effect of $1 of gov contributions on giving)
#for consistency, and following the MA figure 1, I take only rows where ecoef == ecoe

#There is a typo in the MA data, where the SE in Brooks 2000 is 1732
#The original value, from Table 4 in https://doi.org/10.1111/0033-3352.00081 is 1.732 

#I also checked the 0.001 SE in "How Does the Incentive Effect of the Charitable Deduction Vary across Charities?" and it is correct


dat <- here("data/meta_analyses_raw/deWBek2017/Data-GovernmentSupportAndCharitableDonations.xls") |>
  read_excel(na = c("888", "999")) 

# VAB: Compare missing % for each variable in the two subsets.
# I think we need to use `ecoe`, which gives us sample sizes similar to what
# they have in Table 4. Note that Table 3 has a bigger sample size, but they
# don't look at effect sizes at all, only at the sign of coefficients using a
# logit. So my guess is that there was an issue with standardization, and they
# only kept the smaller sample in `ecoe`. Also note that if we only keep the
# rows where ecoef==ecoe, then we throw out nearly all of the experimental
# studies. 

# data.frame(
#     experiment = sapply(filter(dat, sdtype %in% c(1:2, 5)), function(x) sum(!is.na(x))),
#     nonexperiment = sapply(filter(dat, sdtype %in% 3:4), function(x) sum(!is.na(x))))
# 
# unique(dat$ecoef / dat$ecoe)


dat |>
  mutate(meta_id = "deWBek2017",
         question = "Does gov funding crowd in or out private donations?",
         scaling_multiple = dat$ecoef / dat$ecoe,
         estimate = ecoe,
         std.error = ecoefse / scaling_multiple,
         study_year = idyear,
         study_id = paste(idauth, idyear, idjour),
         subset = case_when(sdtype %in% c(1:2, 5) ~ "experimental_studies",
                            sdtype %in% 3:4 ~ "nonexperimental_studies"),
         overall_effect = "both",
         subfield = "PA") |>
  #fix se = 1732 typo in meta-analysis data (see note above)
  mutate(std.error = if_else(std.error == 1732,
                             1.732,
                             std.error)) |>
  #filter(estimate == standardized_est) |> 
  filter(std.error >= 0.00100000) |>
  select(meta_id,
         subfield,
         question,
         study_id,
         study_year,
         estimate,
         std.error,
         subset,
         overall_effect) |>
  write_csv(here("data/meta_analyses_clean/deWBek2017.csv"))

